import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
parks = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/national_parks.csv")
gas = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/gas_price.csv")
pop = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/state_pop.csv")
parks = parks[parks["year"].str.contains("Total")==False]
parks["year"] = parks.year.astype("int")
locations = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/locations.csv")
parks.year.min()
1904
ranks = pd.DataFrame(parks.groupby(by=["unit_name","region"]).visitors.sum()).reset_index()
ranks = ranks.sort_values(by="visitors", ascending=False)
partial = ranks[["unit_name", "visitors"]].head(5)
partial
| unit_name | visitors | |
|---|---|---|
| 33 | Blue Ridge Parkway | 871922828.0 |
| 152 | Golden Gate National Recreation Area | 611031225.0 |
| 162 | Great Smoky Mountains National Park | 521947058.0 |
| 253 | Natchez Trace Parkway | 443145232.0 |
| 213 | Lake Mead National Recreation Area | 411700377.0 |
bypark = parks[["unit_name","year","visitors"]]
bypark = bypark.merge(partial, how="right", on="unit_name")
bypark = bypark.rename(columns={"visitors_x":"Visitors", "visitors_y":"Visitors (All-Time)","unit_name":"Park Name"})
bypark = bypark.sort_values(by="year")
bypark.head(3)
| Park Name | year | Visitors | Visitors (All-Time) | |
|---|---|---|---|---|
| 120 | Great Smoky Mountains National Park | 1931 | 154000.0 | 521947058.0 |
| 205 | Great Smoky Mountains National Park | 1932 | 300000.0 | 521947058.0 |
| 204 | Great Smoky Mountains National Park | 1933 | 375000.0 | 521947058.0 |
fig = px.line(bypark, x='year', y='Visitors',
title='Number of Visitors Per Year at the Five Most Visited US National Parks (1931-2016)',color="Park Name")
fig.update_xaxes(rangeslider_visible=True)
fig.show(renderer='notebook')
fig.write_html("fig1_line.html")
Figure 2: This graph represents the number of visitors over time
Line graph showing the trends in yearly visitor numbers at the five most popular US national parks. All five sites show a clear increase in visitors over time, although some sites were built after the records were created (Golden Gate National Recreation Area was built in the 1970s). Data
import descartes
import geopandas as geo
from shapely.geometry import Point, Polygon
%matplotlib inline
earth = geo.read_file(geo.datasets.get_path('naturalearth_lowres'))
park = parks[['year','gnis_id','parkname','region','state','unit_name','unit_type','visitors']]
parks = park.merge(locations, on='gnis_id', how='left')
parks = parks[(parks['lon'] != (parks.lon.max())) & (parks['year'] != 'Total')]
parks = parks.sort_values(by='year')
parks =
fig = px.scatter_mapbox(parks, lon="lon", lat="lat", animation_frame="year", \
color="visitors", hover_name="unit_name", hover_data=["visitors"],\
zoom=2)
fig["layout"].pop("updatemenus") # optional, drop animation buttons
fig.update_layout(mapbox_style="open-street-map",
margin={"r":0,"t":60,"l":0,"b":0},
title_text='Number of Visitors at US National Sites from 1929 to 2016')
fig.show(renderer='notebook')
fig.write_html("fig2_map.html")
parks = pd.read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-09-17/national_parks.csv")
parks = parks[parks["year"].str.contains("Total")==False]
parks["year"] = parks.year.astype("int")
import plotly.io as pio
import plotly.express as px
plt.rcParams.update({'font.size': 13})
plt.figure(figsize=(12,7))
sns.set_style("whitegrid")
#"darkgrid", {"axes.facecolor": ".9"})
sns.lineplot(data=gas, x='year', y='gas_current', label = 'Current (Nominal)', color='red')
sns.lineplot(data=gas, x='year', y='gas_constant', label = 'Constant (Real)', color='orange')
plt.title('Average Gas Prices in the US from 1929 to 2016', pad=10)
plt.xlabel('Year', labelpad=10)
plt.ylabel('Price (US Dollars)', labelpad=10)
print()
plt.savefig("fig3_gas.jpeg")
poptot = pop.groupby(by="year")["pop"].sum()
poptot = pd.DataFrame(poptot).reset_index()
merge = parks.merge(gas, how="right", on="year")
totals = parks.groupby(by="year")["visitors"].sum()
totals = pd.DataFrame(totals).reset_index()
totals = totals.merge(gas, how="inner", on="year")
totals = totals.merge(poptot, how="inner", on="year")
totals["Visits per Capita"] = totals["visitors"] / totals["pop"]
totals = totals.rename(columns={"gas_constant": "Constant Gas Price ($ per gallon)", "year": "Year"})
#sns.scatterplot(data=totals, x="gas_current", y="percap", label="Nominal Dollars")
import plotly.io as pio
import plotly.express as px
fig = px.scatter(totals, x="Constant Gas Price ($ per gallon)", y="Visits per Capita", color="Year",
title="Gas Prices vs. Number of Visits to US National Sites per Capita from 1929 to 2015",
color_continuous_scale = ["#5CDA56", "#4DC8C3", "#322FCB", "#B838F4", "#F79DDC"])
fig1 = px.line(totals, x="Constant Gas Price ($ per gallon)", y="Visits per Capita")
pio.templates.default="plotly_white"
fig.show(renderer='notebook')
fig.write_html("fig4_scatter.html")
print()
popyear = pd.DataFrame(pop.groupby(by="year").pop.sum()).reset_index()
df = pd.DataFrame(parks.groupby(by="year").visitors.sum()).reset_index()
popyear = popyear.merge(df, how="inner", on="year")
popyear = popyear.rename(columns={"year": "Year"})
popyear["pop"] = popyear["pop"]/1000000
popyear["visitors"] = popyear["visitors"]/1000000
popyear.head(3)
| Year | pop | visitors | |
|---|---|---|---|
| 0 | 1904 | 82.165 | 0.120690 |
| 1 | 1905 | 83.818 | 0.140954 |
| 2 | 1906 | 85.439 | 0.030569 |
plt.rcParams.update({'font.size': 15})
plt.figure(figsize=(12,7))
sns.set_style("whitegrid")
#"darkgrid", {"axes.facecolor": ".9"})
fig = sns.scatterplot(data=popyear, x='pop', y='visitors', hue='Year', palette="winter")
plt.title('Visitors to National Parks vs. US Population (1904-2015)', pad=10)
plt.xlabel('US Population (Millions)', labelpad=10)
plt.ylabel('Visitors to National Parks (Millions)', labelpad=10)
print()
plt.savefig("fig5_pop.jpeg")